Exploration
# Verify Data type
sunspot_TS %>% class()
## [1] "ts"
# Summarize the time series object
sunspot_TS %>% summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 24.57 65.55 78.53 115.47 269.30
# Create box plot to visualize sunspot number
sunspot_TS %>% boxplot(horizontal = TRUE,
main = "Figure 1: Yearly Mean Sunspot Number (1700-2023)",
xlab = "Mean Sunspot Number",
border = "black")
# Add grid
grid(nx = NULL, ny = NULL, col = "gray", lty = "dotted")

ACF and PACF
# Set up a 1x2 layout for plots
par(mfrow=c(1,2))
# Plot ACF plot.
sunspot_TS %>% acf(lag.max = 70,
main = "Figure 3.1: Autocorrelation Function (ACF) of Temperature Anomalies",
xlab = "Lag",
ylab = "ACF")
# Plot PACF plot.
sunspot_TS %>% pacf(lag.max = 70,
main = "Figure 3.2: Partial Autocorrelation Function (PACF) of Temperature Anomalies",
xlab = "Lag",
ylab = "PACF")

# Augmented Dickey-Fuller Test for stationarity
sunspot_TS %>% adf.test()
## Warning in adf.test(.): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: .
## Dickey-Fuller = -5.3319, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Phillips-Perron Unit Root Test for stationarity
sunspot_TS %>% pp.test()
## Warning in pp.test(.): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: .
## Dickey-Fuller Z(alpha) = -83.347, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary
# Plotting a QQ plot for anomaly_TS
sunspot_TS %>% qqnorm(main = "Figure 4: Normal QQ Plot of Anomaly Series")
sunspot_TS %>% qqline(col='blue', lty=2)

# Perform Shapiro-Wilks test for normality.
rawTS_shapiro <- sunspot_TS %>% shapiro.test()
rawTS_shapiro
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.92106, p-value = 4.853e-12
Box-Cox Transformation
# Adding a small positive value to ensure non-zero data
positive_sunspot_TS <- sunspot_TS + (abs(min(sunspot_TS)) + 0.01)
# Applying Box-Cox transformation
BC <- positive_sunspot_TS %>% BoxCox.ar(lambda = seq(-2, 2, 0.01), method = 'yule-walker')
# Add grid
grid(nx = NULL, ny = NULL, col = "gray", lty = "dotted")
# Add main title separately
title(main = 'Figure 5: Optimal Lambda value', adj = 0.5, line = 1.4)

# Use Log-likelihood estimation to find optimal lambda values
lambda <- BC$lambda[which(max(BC$loglike) == BC$loglike)]
# Print results
cat("Lower bound Confidence Interval: ", BC$ci[1],
"\nUpper bound Confidence Interval: ", BC$ci[2],
"\n\nOptimal Lambda value: ", lambda)
## Lower bound Confidence Interval: 0.44
## Upper bound Confidence Interval: 0.56
##
## Optimal Lambda value: 0.49
# Set up a 1x2 layout for plots
par(mfrow=c(2,2))
# Plot original anomaly series across time
sunspot_TS %>% plot(type = 'o',
main = "Figure 6.1: Original Land Temperature Anomalies (1850-2023)",
xlab = "Year",
ylab = "Temperature Anomalies (°C)")
# Add grid
grid(nx = NULL, ny = NULL, col = "gray", lty = "dotted")
# Plot Box-Cox Transformed anomaly series across time
BC_sunspot_TS <- ((positive_sunspot_TS^lambda) - 1) / lambda
BC_sunspot_TS %>% plot(type = 'o',
main = 'Figure 6.2: Box-Cox Transformed Anomaly series (1850-2023)',
xlab = 'Year',
ylab = 'Transformed Temperature anomalies (°C)')
# Add grid
grid(nx = NULL, ny = NULL, col = "gray", lty = "dotted")
# Test for normality with Box-Cox transformed series.
BC_TS_Stest <- BC_sunspot_TS %>% shapiro.test()
# Plotting a QQ plot for anomaly_TS
sunspot_TS %>% qqnorm(main = "Figure 6.3: Normal QQ Plot of Original Anomaly Series")
sunspot_TS %>% qqline(col='blue', lty=2) %>%
text(x = 1, y = -0.4,
labels = paste0("Shapiro-Wilks (p-value): ", rawTS_shapiro[2]),
col='blue', cex=1.5)
grid(nx = NULL, ny = NULL, col = "gray", lty = "dotted")
# QQ-Plot Box-Cox transformed series
BC_sunspot_TS %>% qqnorm(main = "Figure 6.4: Normal QQ Plot of Box-Cox Anomaly Series")
BC_sunspot_TS %>% qqline(lty=2, col='blue') %>%
text(x = 1.2, y = -1.255,
labels = paste0("Shapiro-Wilks (p-value): ", BC_TS_Stest[2]),
col='blue', cex=1.5)
grid(nx = NULL, ny = NULL, col = "gray", lty = "dotted")

# Set up a 1x2 layout for plots
par(mfrow=c(1,2))
# Plot ACF of differenced series
BC_sunspot_TS %>% acf(main = "Figure 8.1: Autocorrelation Function (ACF) of differenced Anomaly series",
xlab = "Lag",
ylab = "ACF", lag.max = 70)
grid()
# Plot ACF of differenced series
BC_sunspot_TS %>% pacf(main = "Figure 8.2: Partial Autocorrelation Function (PACF) of differenced Anomaly series",
xlab = "Lag",
ylab = "PACF", lag.max = 70)
grid()

model <- Arima(sunspot_TS, order = c(3,0,0), seasonal = list(order=c(2,1,2), period=1))
res_model <- rstandard(model)
plot(res_model, type='o')

par(mfrow=c(1,2))
res_model %>% acf(lag.max = 100)
res_model %>% pacf(lag.max = 100)

model <- auto.arima(sunspot_TS)
r <- forecast(model)
r %>% plot()

model %>% summary()
## Series: sunspot_TS
## ARIMA(2,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 mean
## 1.4808 -0.7720 -0.1858 78.8401
## s.e. 0.0475 0.0428 0.0705 3.9410
##
## sigma^2 = 650.2: log likelihood = -1508.28
## AIC=3026.57 AICc=3026.76 BIC=3045.47
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0006686022 25.34141 19.33997 -Inf Inf 0.6712814 -0.00633822